Go back to the Preprocessing page. This link might be useful to keep track of the files created during the preprocessing.
Let us set some global options for all code chunks in this document.
knitr::opts_chunk$set(
message = FALSE, # Disable messages printed by R code chunks
warning = FALSE, # Disable warnings printed by R code chunks
echo = TRUE, # Show R code within code chunks in output
include = TRUE, # Include both R code and its results in output
eval = TRUE, # Evaluate R code chunks
cache = FALSE, # Enable caching of R code chunks for faster rendering
fig.align = "center",
out.width = "100%",
retina = 2,
error = TRUE,
collapse = FALSE
)
rm(list = ls())
set.seed(1982)# Install R-INLA package
# install.packages("INLA",repos = c(getOption("repos"),INLA ="https://inla.r-inla-download.org/R/testing"), dep = TRUE)
# Update R-INLA package
# inla.upgrade(testing = TRUE)
# Install inlabru package
# remotes::install_github("inlabru-org/inlabru", ref = "devel")
# Install rSPDE package
# remotes::install_github("davidbolin/rspde", ref = "devel")
# Install MetricGraph package
# remotes::install_github("davidbolin/metricgraph", ref = "devel")
library(INLA)
library(inlabru)
library(rSPDE)
library(MetricGraph)
library(plotly)
library(dplyr)
library(tidyr)
library(sf)
library(here) # here() starts from the home directory
library(rmarkdown)
rm(list = ls()) # Clear the workspace
set.seed(1982) # Set seed for reproducibilityThe initial data was downloaded from this
link and stored in path_to_initial_data.
# Run source1.R, which contains instructions to create a string (path_to_initial_data) with the path to the initial data
source(here("source_folder/source1.R"))
# Read the raw data (stored at path_to_initial_data)
raw <- read.csv(path_to_initial_data)
# Show the dimensions of the raw data
raw |> dim()## [1] 36163436 6
# Bus numbers according to Wikipedia
busesnumber <- c(8501:8530,
8531:8560,
8601:8662,
8701:8750,
8751:8780,
8800:8969,
6500:6554,
6560:6697,
6700:6730,
5701:5885,
7201:7293)
# Filter and prepare data. The resulting dataset contains PDT, ID, speed, datetime, day, hour variables
january <- raw %>%
filter(Vehicle.ID %in% busesnumber) %>% # Filter by bus numbers
filter(Latitude > 37.7, Latitude < 37.815, Longitude > -122.52, Longitude < -122.36) %>% # Filter by the area of interest
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326) %>% # Transform to an sf object
mutate(datetime = strptime(Position.Date.Time, format="%m/%d/%Y %I:%M:%S %p")) %>% # Create datetime variable with format so we can manipulate later
mutate(day = as.integer(format(datetime, format = "%d")), hour = as.integer(format(datetime, format = "%H"))) %>% # Create day and hour variables
filter(Average.Speed < 73) %>% # Remove some atypical speed observations. 73 because we are allowing 10kph above the limit
mutate(Average.Speed = Average.Speed*1.60934) %>% # Transform from mph to kph
select(-Heading) %>% # Remove Heading variable
rename(speed = Average.Speed, ID = Vehicle.ID, PDT = Position.Date.Time) # Rename Average.Speed to speed, Vehicle.ID to ID, Position.Date.Time to PDT, so it is easier to write
# Save the january dataset
save(january, file = here("data_files/january.RData"))